from pyspark import SparkContext

sc = SparkContext('local')
rdd1 = sc.parallelize(['C', 'A', 'B', 'B'])
rdd2 = sc.parallelize(['A', 'C', 'D', 'J'])
rdd3 = rdd1.union(rdd2)
rdd4 = rdd1.intersection(rdd2)
print(rdd3.collect())
print(rdd4.collect())

print(rdd3.sortBy(lambda x:(x[0])).collect())